suppressPackageStartupMessages({
library(reticulate)
library(SingleCellExperiment)
library(scuttle)
library(scater)
library(scran)
library(scRNAseq)
library(zellkonverter)
})The Pancreas data set (Bastidas-Ponce et al.
2019) comes from a study of endocrine development in mouse, and
was generated with the 10x Genomics Chromium platform, using v2
chemistry. It is commonly used as an example data set for illustrating
RNA velocity analyses. Here, we obtain it from the scVelo
python package and convert it to a SingleCellExperiment
object using the zellkonverter
Bioconductor package.
## Set conda environment to have access to scVelo
reticulate::use_condaenv(basilisk:::.obtainEnvironmentPath(velociraptor:::velo.env))
scv <- reticulate::import("scvelo")
## Load data set and create counts assay
pancreas <- zellkonverter::AnnData2SCE(scv$datasets$pancreas())
assay(pancreas, "counts") <- assay(pancreas, "X")
## Calculate log-normalized counts
pancreas <- scuttle::logNormCounts(pancreas)
## Find highly variable genes
dec <- scran::modelGeneVar(pancreas)
top.hvgs <- scran::getTopHVGs(dec, n = 2000)
metadata(pancreas)$HVGs <- top.hvgs
## Plot provided UMAP representation
scater::plotReducedDim(pancreas, dimred = "X_umap", colour_by = "clusters")
pancreas
#> class: SingleCellExperiment
#> dim: 27998 3696
#> metadata(6): clusters_coarse_colors clusters_colors ... pca HVGs
#> assays(5): X spliced unspliced counts logcounts
#> rownames(27998): Xkr4 Gm37381 ... Gm20837 Erdr1
#> rowData names(1): highly_variable_genes
#> colnames(3696): AAACCTGAGAGGGATA AAACCTGAGCCTTGAT ... TTTGTCATCGAATGCT
#> TTTGTCATCTGTTTGT
#> colData names(5): clusters_coarse clusters S_score G2M_score sizeFactor
#> reducedDimNames(2): X_pca X_umap
#> mainExpName: NULL
#> altExpNames(0):
## Save SingleCellExperiment
saveRDS(pancreas, file = "pancreas.rds")
## Save AnnData object
zellkonverter::writeH5AD(pancreas, file = "pancreas_AnnData.h5ad",
X_name = "counts")The Spermatogenesis data set (Hermann et al. 2018) consists of steady-state spermatogenic cells from an adult mouse, and was obtained with 10x Genomics Chromium v2 chemistry.
Spliced and unspliced counts for this data set are available via the
scRNAseq
Bioconductor package. See here
for a record of how the data was processed to get to the
SingleCellExperiment object from the raw FASTQ files.
## Load data set
spermatogenesis <- HermannSpermatogenesisData()
#> snapshotDate(): 2021-10-19
#> see ?scRNAseq and browseVignettes('scRNAseq') for documentation
#> loading from cache
#> see ?scRNAseq and browseVignettes('scRNAseq') for documentation
#> loading from cache
#> see ?scRNAseq and browseVignettes('scRNAseq') for documentation
#> loading from cache
## Add a 'counts' assay for easier downstream analysis
## (here equal to the 'spliced' assay)
assay(spermatogenesis, "counts") <- assay(spermatogenesis, "spliced")
## Calculate log-normalized counts
spermatogenesis <- scuttle::logNormCounts(spermatogenesis)
## Find highly variable genes
dec <- scran::modelGeneVar(spermatogenesis)
#> Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
#> collapsing to unique 'x' values
top.hvgs <- scran::getTopHVGs(dec, n = 2000)
metadata(spermatogenesis)$HVGs <- top.hvgs
## Perform dimensionality reduction
set.seed(100)
spermatogenesis <- scater::runPCA(spermatogenesis, subset_row = top.hvgs)
spermatogenesis <- scater::runTSNE(spermatogenesis, dimred = "PCA")
## Plot tSNE representation
scater::plotTSNE(spermatogenesis, colour_by = "celltype")
spermatogenesis
#> class: SingleCellExperiment
#> dim: 54448 2325
#> metadata(1): HVGs
#> assays(4): spliced unspliced counts logcounts
#> rownames(54448): ENSMUSG00000102693.1 ENSMUSG00000064842.1 ...
#> ENSMUSG00000064369.1 ENSMUSG00000064372.1
#> rowData names(0):
#> colnames(2325): CCCATACTCCGAAGAG AATCCAGTCATCTGCC ... ATCCACCCACCACCAG
#> ATTGGTGGTTACCGAT
#> colData names(2): celltype sizeFactor
#> reducedDimNames(2): PCA TSNE
#> mainExpName: NULL
#> altExpNames(0):
## Save SingleCellExperiment
saveRDS(spermatogenesis, file = "spermatogenesis.rds")
## Save AnnData object
zellkonverter::writeH5AD(spermatogenesis, file = "spermatogenesis_AnnData.h5ad",
X_name = "counts")sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] zellkonverter_1.4.0 scRNAseq_2.8.0
#> [3] scran_1.22.1 scater_1.22.0
#> [5] ggplot2_3.3.5 scuttle_1.4.0
#> [7] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
#> [9] Biobase_2.54.0 GenomicRanges_1.46.1
#> [11] GenomeInfoDb_1.30.1 IRanges_2.28.0
#> [13] S4Vectors_0.32.4 BiocGenerics_0.40.0
#> [15] MatrixGenerics_1.6.0 matrixStats_0.62.0
#> [17] reticulate_1.24 BiocStyle_2.22.0
#>
#> loaded via a namespace (and not attached):
#> [1] AnnotationHub_3.2.2 BiocFileCache_2.2.1
#> [3] igraph_1.3.1 lazyeval_0.2.2
#> [5] BiocParallel_1.28.3 digest_0.6.29
#> [7] ensembldb_2.18.4 htmltools_0.5.2
#> [9] viridis_0.6.2 fansi_1.0.3
#> [11] magrittr_2.0.3 memoise_2.0.1
#> [13] ScaledMatrix_1.2.0 cluster_2.1.3
#> [15] limma_3.50.3 Biostrings_2.62.0
#> [17] prettyunits_1.1.1 colorspace_2.0-3
#> [19] blob_1.2.3 rappdirs_0.3.3
#> [21] ggrepel_0.9.1 xfun_0.30
#> [23] dplyr_1.0.8 crayon_1.5.1
#> [25] RCurl_1.98-1.6 jsonlite_1.8.0
#> [27] glue_1.6.2 velociraptor_1.4.0
#> [29] gtable_0.3.0 zlibbioc_1.40.0
#> [31] XVector_0.34.0 DelayedArray_0.20.0
#> [33] BiocSingular_1.10.0 scales_1.2.0
#> [35] DBI_1.1.2 edgeR_3.36.0
#> [37] Rcpp_1.0.8.3 viridisLite_0.4.0
#> [39] xtable_1.8-4 progress_1.2.2
#> [41] dqrng_0.3.0 bit_4.0.4
#> [43] rsvd_1.0.5 metapod_1.2.0
#> [45] httr_1.4.2 dir.expiry_1.2.0
#> [47] ellipsis_0.3.2 farver_2.1.0
#> [49] pkgconfig_2.0.3 XML_3.99-0.9
#> [51] sass_0.4.1 dbplyr_2.1.1
#> [53] locfit_1.5-9.5 utf8_1.2.2
#> [55] labeling_0.4.2 tidyselect_1.1.2
#> [57] rlang_1.0.2 later_1.3.0
#> [59] AnnotationDbi_1.56.2 munsell_0.5.0
#> [61] BiocVersion_3.14.0 tools_4.1.2
#> [63] cachem_1.0.6 cli_3.2.0
#> [65] generics_0.1.2 RSQLite_2.2.12
#> [67] ExperimentHub_2.2.1 evaluate_0.15
#> [69] stringr_1.4.0 fastmap_1.1.0
#> [71] yaml_2.3.5 knitr_1.38
#> [73] bit64_4.0.5 purrr_0.3.4
#> [75] AnnotationFilter_1.18.0 KEGGREST_1.34.0
#> [77] sparseMatrixStats_1.6.0 mime_0.12
#> [79] xml2_1.3.3 biomaRt_2.50.3
#> [81] compiler_4.1.2 rstudioapi_0.13
#> [83] beeswarm_0.4.0 filelock_1.0.2
#> [85] curl_4.3.2 png_0.1-7
#> [87] interactiveDisplayBase_1.32.0 tibble_3.1.6
#> [89] statmod_1.4.36 bslib_0.3.1
#> [91] stringi_1.7.6 highr_0.9
#> [93] basilisk.utils_1.6.0 GenomicFeatures_1.46.5
#> [95] lattice_0.20-45 bluster_1.4.0
#> [97] ProtGenerics_1.26.0 Matrix_1.4-1
#> [99] vctrs_0.4.1 pillar_1.7.0
#> [101] lifecycle_1.0.1 BiocManager_1.30.17
#> [103] jquerylib_0.1.4 BiocNeighbors_1.12.0
#> [105] cowplot_1.1.1 bitops_1.0-7
#> [107] irlba_2.3.5 httpuv_1.6.5
#> [109] rtracklayer_1.54.0 R6_2.5.1
#> [111] BiocIO_1.4.0 promises_1.2.0.1
#> [113] gridExtra_2.3 vipor_0.4.5
#> [115] assertthat_0.2.1 rjson_0.2.21
#> [117] withr_2.5.0 GenomicAlignments_1.30.0
#> [119] Rsamtools_2.10.0 GenomeInfoDbData_1.2.7
#> [121] parallel_4.1.2 hms_1.1.1
#> [123] grid_4.1.2 beachmat_2.10.0
#> [125] basilisk_1.6.0 rmarkdown_2.13
#> [127] DelayedMatrixStats_1.16.0 Rtsne_0.16
#> [129] shiny_1.7.1 ggbeeswarm_0.6.0
#> [131] restfulr_0.0.13